//	ftpcat.cpp  -  ftp site catalog sample code
//
//	This is a part of the MetaKit library.
//	Copyright (c) 1996 Meta Four Software.
//	All rights reserved.
/////////////////////////////////////////////////////////////////////////////

#include "m4kit.h"

#include <io.h>

/////////////////////////////////////////////////////////////////////////////
// Property definitions

	c4_ViewProp		pFiles ("files");
	c4_IntProp		pParent ("parent"),
					pSize ("size"),
					pDate ("date");
	c4_StringProp	pName ("name");

/////////////////////////////////////////////////////////////////////////////
// Reconstruct the full path name from a subdirectory index in the tree

CString fFullPath(c4_View& dirs_, int dirNum_)
{
		// Prefix all parent dir names until the root level is reached
	CString path;
	for (;;)
	{
		path = pName (dirs_[dirNum_]) + "/" + path;

		if (dirNum_ == 0)
			return path; // this result always has a trailing backslash
			
		dirNum_ = (int) pParent (dirs_[dirNum_]);
	}
}

/////////////////////////////////////////////////////////////////////////////
// Attempt to convert Unix date back to good ol' DOS format, i.e. 7:5:4 bits

int DecodeUnixDate(const CString& buf_)
{
	static CString months = "JanFebMarAprMayJunJulAugSepOctNovDec";

		// extra logic, in case no year was specified
	static int limit = 0, year = 0;
	if (!limit)
	{
			// determine the time 30 days ahead (clock may be a bit off)
		time_t t1 = time(0) + 30 * 24 * 3600L;
		tm* t2 = gmtime(&t1);
			// remember that date as the limit date
		limit = ((t2->tm_year - 80) << 9) |
				((t2->tm_mon + 1) << 5) | t2->tm_mday;
			// remember the default year to use if none is specified
			// this may be a year to far, we'll back up later if needed
		year = t2->tm_year + 1900;
	}

	int m = months.Find(buf_.Left(3));
	if (m % 3 != 0 || buf_[3] != ' ' || buf_[6] != ' ')
		return 0;

	int y = buf_[9] == ':' ? year : atoi(buf_.Mid(8,4));
	if (y < 1980 || y > 2080)
		return 0;

	int x = ((y - 1980) << 9) | ((m / 3 + 1) << 5) | atoi(buf_.Mid(4,2));
		// if the date is within a year into the future, use prev year
	if (x > limit && x - 512 < limit)
		x -= 512;
	return x;
}

/////////////////////////////////////////////////////////////////////////////
// Decode a single line of a Unix-style directory listing

	// Note: there are a *lot* more systems and listing formats out there,
	// but this is only an example, it works fine with several popular sites

char DecodeUnixEntry(const CString& buf_, c4_Row& entry_)
{
		// assume each entry ends with: ' ' <size> ' ' <date> ' ' <name>
	int n = buf_.ReverseFind(' ');

		// Mac filenames can contain ' ', so check in regular place first
	if (n > 55 && buf_[54] == ' ' && buf_[41] == ' ' &&
						DecodeUnixDate(buf_.Mid(42, 12)) != 0)
		n = 54; // date in regular position is ok, so use that instead

	if (n > 40 && buf_[n-13] == ' ')
	{
		pName (entry_) = buf_.Mid(n+1);

		switch (buf_[0])
		{
		case '-':		// regular file
					pDate (entry_) = DecodeUnixDate(buf_.Mid(n-12, 12));
					n = buf_.Left(n-13).ReverseFind(' ');
					if (n > 20)
					{
						pSize (entry_) = atol(buf_.Mid(n));
						return 'f';
					}
					break;

		case 'd':		// directory, but not if name starts with '.'
					if (buf_[n+1] != '.')
						return 'd';
		}
	}

	return 0;
}

/////////////////////////////////////////////////////////////////////////////
// Scan a remote ftp site and return a corresponding structure for it
//
//	This code is extremely simple in terms of tcp/ip stuff, since there
//	aren't any calls - everything is handled by Win95's FTP.EXE program.
//
//	On the other hand, this code is pretty tricky since two pipes are
//	set up to control the execution of this child program. One pipe feeds
//	commands to ftp, the other reads back results and decodes each line.
//
//	For some unknown reason, all server messages are lost. This is not
//	critical, since the list output *does* get through, as well as any
//	other output from commands. That's enough to make this thing work.
//
//	The bottom line is:
//
//		1)  There is no networking stuff in here, FTP.EXE does it all
//		2)	This is built as a console task, so popen is neatly hidden
//		3)	The result is a perfectly usable command-line utility
//		4)	This only works on Win95 (perhaps also on NT 3.51 or 4.0)
//		5)	The port suffix (site::port) is not implemented

c4_View fScanFTP(const char* path_, const char* usr_, const char* pw_)
{
		// everything before the first '/' is the site name
	CString site = path_;
	site = site.SpanExcluding("/");

		// start with a view containing the path (without the site prefix)
	c4_View dirs;
	dirs.Add(pName [path_ + site.GetLength()]);

		// set up a pipe, this reminds me of the good ol' Unix days...
	int ph[2];
	VERIFY(_pipe(ph, 100000, 0) == 0); // allow 100k in the pipeline
	VERIFY(_dup2(ph[1], 1) == 0);
	VERIFY(_close(ph[1]) == 0);
		// now, when the "list" file is read, its data will come from stdout
	FILE* list = fdopen(ph[0], "rt");
	ASSERT(list);

	fprintf(stderr, "Connecting to %s ...\n", (const char*) site);

		// spawn a process, connect, and prepare to pipe commands to it
	FILE* cmds = _popen("ftp -n " + site, "wt");
	ASSERT(cmds);

		// only the child should have the write side of the pipe open
	VERIFY(_dup2(2, 1) == 0);

		// prepare to do some work by logging in
	fprintf(cmds, "user %s %s\npwd\n", usr_, pw_);
	fflush(cmds);

		// read one line back right now, to make sure connection is ok
	char buf [1024];
	fgets(buf, sizeof buf, list);

    	// This loop "automagically" handles the recursive traversal of all
    	// subdirectories. The trick is that each scan may add new entries
    	// at the end, causing this loop to continue (GetSize() changes!).
    
	int i;

    for (i = 0; i < dirs.GetSize(); ++i)
    {
    	CString path = fFullPath(dirs, i);
		if (path != "/")	// remove the trailing slash
			path = path.Left(path.GetLength() - 1);

			// send two command to the child process, the second one is
			// needed to produce a trailing line we can wait on
		fprintf(stderr, "%4d: %-65.65s\r", i, (const char*) path);
		fputs("dir " + path + "\npwd\n", cmds);
		fflush(cmds);

		c4_View files;
		c4_Row dir, file;

			// look at each of the returned lines in turn
		while (fgets(buf, sizeof buf, list))
		{
			CString temp = buf;
			temp = temp.SpanExcluding("\r\n");

			int result = atoi(temp);
			if (result == 0)
			{
//				puts(temp);
				char type = DecodeUnixEntry(temp, file);
				if (type == 'd')
				{
					pParent (dir) = i;
					pName (dir) = pName (file);
					dirs.Add(dir);
				}
				else if (type == 'f')
				{
					files.Add(file);
				}
			}
			else
			{
				pFiles (dirs[i]) = files.SortOn(pName);
				break;
			}
		}
	}

	fprintf(stderr, "%75s\r%4d directories scanned.\n", "", i);

	_pclose(cmds);
	fclose(list);
	
		// The returned object contains the entire directory tree.
		// Everything is automatically destroyed when no longer referenced.	
	return dirs;
}

/////////////////////////////////////////////////////////////////////////////
// Try this on internet: "ftpcat ftp.winsite.com/pub/pc/win95/programr"

int main(int argc, char** argv)
{
	const char* dest = argc == 3 ? argv[--argc] : "ftpcat.dat";

	if (argc != 2)
	{
		fprintf(stderr, "Usage: FTPCAT site/path [output]\n"
			"   or: FTPCAT [ftp://][user:passwd@]site[/path] [output]\n");
		return 1;
	}

		// the following logic splits up an URL into its components

	CString arg = argv[1];
	if (arg.Left(6).CompareNoCase("ftp://") == 0)
		arg = arg.Mid(6);

	CString id = "anonymous:ftpcat@any.org";
	if (arg.Find('@') >= 0)
	{
		int n = arg.ReverseFind('@'); // use the last one!
		id = arg.Left(n);
		arg = arg.Mid(n + 1);
	}

	CString pass;
	if (id.Find(':') >= 0)
	{
		int n = id.Find(':');
		pass = id.Mid(n + 1);
		id = id.Left(n);
	}

		// ready to scan, prepare a storage object for the results
	c4_Storage storage (dest, true);

		// this scans the ftp site and saves the results
	c4_View view = fScanFTP(arg, id, pass);
	storage.Store("dirs", view);

		// data will only be stored when actually comitted
	storage.Commit();

	return 0;
}

/////////////////////////////////////////////////////////////////////////////
